library(tidyverse)
library(magrittr)
library(nomisr)
library(skimr)
library(ggrepel)
library(ggExtra)
require(scales)
library(classInt)
library(sf)
library(ggmap)
library(ggsflabel)
library(mapsf)

This document uses the Demography and migration data for England and Wales from the Census 2021 released on 2 November 2022, as well as Output Areas (December 2021) Boundaries Generalised Clipped EW (BGC) and the OAs to LSOAs to MSOAs (2021) to Local Enterprise Partnership to LAD (May 2022) Lookup in England. Source: Office for National Statistics licensed under the Open Government Licence v3.0. Contains both Ordnance Survey and ONS Intellectual Property Rights. Contains OS data © Crown copyright and database right 2022.

Data

The 2021 Census data have been downloaded to the storage folder using the scripts 002-download-data-2022-11-02.R and 003-download-oa-geometries.R available in the 000-data folder.

Load data

Load 2021 Census data from the storage folder.

population_density_2021 <- read_csv(
    "../storage/atc-ts-demmig-ur-pd-oa-oa.csv"
  ) %>% 
  rename(
    OA21CD = `Output Areas Code`,
    OA21NM = `Output Areas Label`,
    population_density = `Population Density`
  )
## Rows: 188880 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Output Areas Code, Output Areas Label
## dbl (1): Population Density
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
oa_2021_geo <-
  st_read("../storage/Leicester_2021_OAs.geojson")
## Reading layer `Leicester_2021_OAs' from data source 
##   `/Users/sds27/repos/2021-census-analysis/storage/Leicester_2021_OAs.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1010 features and 13 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -1.215981 ymin: 52.5807 xmax: -1.046165 ymax: 52.69149
## Geodetic CRS:  WGS 84
population_2021_est <-
  oa_2021_geo %>% 
  st_transform(crs = 27700) %>% 
  mutate(
    area = st_area(geometry) %>% as.numeric()
  ) %>% 
  st_drop_geometry() %>% 
  left_join(
    population_density_2021
  ) %>% 
  mutate(
    population = as.numeric(round(population_density * (area / 1000000)))
  ) %>% 
  select(OA21CD, population_density, area, population)
## Joining, by = "OA21CD"
households_2021 <- read_csv(
    "../storage/atc-ts-demmig-hh-ct-oa-oa.csv"
  ) %>% 
  rename(
    OA21CD = `Output Areas Code`,
    OA21NM = `Output Areas Label`,
    households = `Count`
  )
## Rows: 188880 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Output Areas Code, Output Areas Label
## dbl (1): Count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pop_hh_2021 <-
  oa_2021_geo %>% 
  left_join(
    population_2021_est
  ) %>% 
  left_join(
    households_2021
  ) %>% 
  mutate(
    pop_hh_2021 = population / households
  )
## Joining, by = "OA21CD"
## Joining, by = "OA21CD"
pop_hh_2011 <-
  st_read("../storage/Leicester_2011_OAs_population_households.geojson") %>% 
  mutate(
    pop_hh_2011 = all_persons_2011 / households_2011
  )
## Reading layer `Leicester_2011_OAs_population_households' from data source 
##   `/Users/sds27/repos/2021-census-analysis/storage/Leicester_2011_OAs_population_households.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 969 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -1.215981 ymin: 52.5807 xmax: -1.046165 ymax: 52.69149
## Geodetic CRS:  WGS 84

Check density estimation

Checking for outliers in the density estimation.

popdens_area_model <-
  population_2021_est %>% 
  lm(
    log10(population_density) ~
      log10(area),
    data = .
  )

popdens_area_model %>% 
  summary()
## 
## Call:
## lm(formula = log10(population_density) ~ log10(area), data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53130 -0.07372  0.00219  0.07265  0.81394 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.15956    0.05115  159.52   <2e-16 ***
## log10(area) -0.91804    0.01092  -84.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1272 on 1008 degrees of freedom
## Multiple R-squared:  0.8751, Adjusted R-squared:  0.875 
## F-statistic:  7065 on 1 and 1008 DF,  p-value: < 2.2e-16
population_2021_est_lm <-
  population_2021_est %>% 
  mutate(
    cooksd =
      popdens_area_model %>% 
      cooks.distance()
  )
population_2021_est_lm %>% 
  mutate(
    plot_label = if_else(
      cooksd > 0.01, 
      OA21CD, NA_character_
    )
  ) %>% 
  ggplot(
    aes(
      x = area,
      y = population_density,
      colour = cooksd,
      label = plot_label
    )
  ) +
  scale_color_viridis_c(option = "inferno", direction = -1) +
  geom_point() +
  geom_text_repel() +
  scale_x_log10() +
  scale_y_log10() +
  theme_bw()
## Warning: Removed 991 rows containing missing values (`geom_text_repel()`).

Exploring population and households

leicester_ggmap <-
  ggmap(
    get_stamenmap(
      c(left = -1.225, bottom = 52.575, right = -1.0, top = 52.7),
      zoom = 14,
      maptype = "toner"
    )
  )
pop_hh_maps_values <-
  pop_hh_2021 %>% 
  pull(pop_hh_2021) %>% 
  c(
    pop_hh_2011 %>% 
    pull(pop_hh_2011)
  )

pop_hh_maps_breaks <-
  classIntervals(
    c(
      pop_hh_maps_values %>% min() %>% subtract(0.000001),
      pop_hh_maps_values,
      pop_hh_maps_values %>% max() %>% add(0.000001)
    ), 
    n = 9, 
    style = "jenks"
  ) %$%
  brks

pop_hh_maps_cats <-
  pop_hh_maps_values %>% 
  cut(pop_hh_maps_breaks) %>% 
  unique()